mkdir ITS1
mkdir ITS2
mkdir ITS3
mkdir ITS4
mkdir ITS5
mkdir ITS6
mkdir ITS7
mkdir ITS8
mkdir ITS9
for i in ITS* ;do mv ITS*/ITS_*1.fastq.gz ITS*/forward.fastq.gz ; done
for i in ITS* ;do mv ITS*/ITS_*2.fastq.gz ITS*/reverse.fastq.gz ; done
for i in ITS*; do qiime tools import --type MultiplexedPairedEndBarcodeInSequence --input-path $i --output-path $i.qza ; done
qiime cutadapt demux-paired\
--i-seqs its1.qza\
--m-forward-barcodes-file ../../maps_its/its1.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its1_demux
qiime cutadapt demux-paired\
--i-seqs its2.qza\
--m-forward-barcodes-file ../../maps_its/its2.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its2_demux
qiime cutadapt demux-paired\
--i-seqs its3.qza\
--m-forward-barcodes-file ../../maps_its/its3.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its3_demux
qiime cutadapt demux-paired\
--i-seqs its4.qza --m-forward-barcodes-file ../../maps_its/its4.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its4_demux
qiime cutadapt demux-paired --i-seqs its5.qza\
--m-forward-barcodes-file ../../maps_its/its5.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its5_demux
qiime cutadapt demux-paired --i-seqs its6.qza\
--m-forward-barcodes-file ../../maps_its/its6.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its6_demux
qiime cutadapt demux-paired --i-seqs its7.qza\
--m-forward-barcodes-file ../../maps_its/its7.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its7_demux
qiime cutadapt demux-paired\
--i-seqs its8.qza\
--m-forward-barcodes-file ../../maps_its/its8.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its8_demux
qiime cutadapt demux-paired\
--i-seqs its9.qza\
--m-forward-barcodes-file ../../maps_its/its9.txt\
--m-forward-barcodes-column BarcodeSequence\
--output-dir its9_demux
for i in its*_demux ; do qiime itsxpress trim-pair --i-per-sample-sequences $i/per_sample_sequences.qza --p-region ITS2 --p-taxa F --o-trimmed itsxpress_2024_$i;done
for i in itsxpress_2024_its*; do qiime tools export --input-path $i --output-path exported; done
qiime tools import --type 'SampleData[SequencesWithQuality]' --input-path manifest.txt --output-path single-end-demux-qiime2.qza --input-format SingleEndFastqManifestPhred33V2
for i in its*_demux ; do qiime itsxpress trim-pair-output-unmerged --i-per-sample-sequences $i/per_sample_sequences.qza --p-region ITS2 --p-taxa F --o-trimmed itsxpress_2024_unmerged$i;done
for i in itsxpress_2024_unmerged*; do qiime tools export --input-path $i --output-path exported_$i; done
qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path manifest.txt --output-path paired-end-demux.qza --input-format PairedEndFastqManifestPhred33V2
for i in its*_demux; do qiime tools export --input-path $i/per_sample_sequences.qza --output-path exported_$i; done
#!/bin/bash
# Set the path to the itsxpress executable
ITSXPRESS_EXECUTABLE="itsxpress"
# Loop through the files
for forward_file in *_R1_001.fastq.gz; do
# Extract the sample name from the forward file
sample_name=$(basename "$forward_file" _R1_001.fastq.gz)
# Construct the reverse file name
reverse_file="${sample_name}_R2_001.fastq.gz"
# Run itsxpress command
$ITSXPRESS_EXECUTABLE --fastq "$forward_file" --fastq2 "$reverse_file" --region ITS2 \
--taxa Fungi --log "${sample_name}_logfile.txt" --outfile "${sample_name}_trimmed_reads.fastq.gz" --threads 2
# Optionally, you can print a message indicating the completion of each iteration
echo "Processing $forward_file and $reverse_file"
done
#!/bin/bash
# Set the path to the itsxpress executable
ITSXPRESS_EXECUTABLE="itsxpress"
# Loop through the files
for forward_file in *_R1_001.fastq.gz; do
# Extract the sample name from the forward file
sample_name=$(basename "$forward_file" _R1_001.fastq.gz)
# Construct the reverse file name
reverse_file="${sample_name}_R2_001.fastq.gz"
# Run itsxpress command
$ITSXPRESS_EXECUTABLE --fastq "$forward_file" --fastq2 "$reverse_file" --region ITS2 \
--taxa Fungi --log "${sample_name}_logfile.txt" --outfile "${sample_name}_trimmed_R1.fastq.gz" --outfile2 "${sample_name}_trimmed_R2.fastq.gz" --threads 4
# Optionally, you can print a message indicating the completion of each iteration
echo "Processing $forward_file and $reverse_file"
doneqiime tools import\
--type 'SampleData[SequencesWithQuality]'\
--input-path manifest.txt\
--output-path single-end-demux-standalone.qza\
--input-format SingleEndFastqManifestPhred33V2
for i in *; do qiime dada2 denoise-single --i-demultiplexed-seqs $i --p-trunc-len 0 --p-n-threads 4 --output-dir dada2_$i; done
qiime feature-table filter-seqs\
--m-metadata-file dada2_single-end-demux-qiime2.qza/representative_sequences.qza\
--i-data dada2_single-end-demux-qiime2.qza/representative_sequences.qza\
--o-filtered-data representative_sequences_50_qiime2.qza\
--p-where "length(sequence) > 49"
qiime feature-table filter-seqs\
--m-metadata-file dada2_single-end-demux-standalone.qza/representative_sequences.qza\
--i-data dada2_single-end-demux-standalone.qza/representative_sequences.qza\
--o-filtered-data representative_sequences_50_standalone.qza\
--p-where "length(sequence) > 49"qiime feature-table filter-features\
--i-table dada2_single-end-demux-qiime2.qza/table.qza\
--m-metadata-file representative_sequences_50_qiime2.qza\
--o-filtered-table table_50_qiime2.qza
qiime feature-table filter-features\
--i-table dada2_single-end-demux-standalone.qza/table.qza\
--m-metadata-file representative_sequences_50_standalone.qza\
--o-filtered-table table_50_standalone.qza qiime dada2 denoise-paired --i-demultiplexed-seqs paired-end-demux.qza --p-trunc-len-f 0 --p-trunc-len-r 0 --p-n-threads 4 --output-dir dada2_paired
Filter by length not neccesary because min length was 231.
qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path manifest_unmerged.txt --output
-path paired-end-demux-standalone.qza --input-format PairedEndFastqManifestPhred33V2
qiime dada2 denoise-paired --i-
demultiplexed-seqs paired-end-demux-standalone.qza --p-trunc-len-f 0 --p-trunc-len-r 0
--p-n-threads 4 --output-dir dada2_paired_standalone
For qiime2-itsxpress data, we have to export and run:
for i in *.fastq.gz; do reformat in=$i out=clean_$i minconsecutivebases=1; done
This script is from (BBMap)[https://github.com/BioInfoTools/BBMap/blob/master/sh/reformat.sh]
Then import again and run all. This is due to an error of ITSxpress that generates sequences with 0 lentgh. The next steps will be run for the standalone and qiime2 data in order to cluster to OTUS.
for i in *;do qiime quality-filter q-score --i-demux $i --output-dir filterqscore_$i; done
for i in filterqscore_*; do qiime vsearch dereplicate-sequences --i-sequences $i/filtered_sequences.qza --output-dir derep_$i;done
for i in derep_* ; do qiime vsearch cluster-features-de-novo --i-sequences $i/dereplicated_sequences.qza --i-table $i/dereplicated_table.qza --p-perc-identity 0.97 --p-threads 4 --output-dir cluster97_$i; done
for i in cluster97_*; do qiime vsearch uchime-denovo --i-sequences $i/clustered_sequences.qza --i-table $i/clustered_table.qza --output-dir chimera97_$i;done
qiime feature-table filter-features\
--i-table cluster97_derep_filterqscore_single-end-demux-qiime2-reformat.qza/clustered_table.qza\
--m-metadata-file chimera97_cluster_derep_filterqscore_single-end-demux-qiime2-reformat.qza/nonchimeras.qza \
--o-filtered-table chimera97_cluster_derep_filterqscore_single-end-demux-qiime2-reformat.qza/table97-nonchimeras-qiime2.qza
qiime feature-table filter-features\
--i-table cluster97_derep_filterqscore_single-end-demux-standalone.qza/clustered_table.qza\
--m-metadata-file chimera97_cluster_derep_filterqscore_single-end-demux-standalone.qza/nonchimeras.qza --o-filtered-table chimera97_cluster_derep_filterqscore_single-end-demux-standalone.qza/table97-nonchimeras-standalone.qza
for i in chimera* ; do qiime feature-table filter-features --i-table $i/table*.qza --p-min-frequency 2 --o-filtered-table $i/filtered_$i ; done
for i in chimera*/; do
data_file=$(find $i -name 'nonchimeras*' -type f)
table_file=$(find $i -name 'filtered-table*' -type f)
qiime feature-table filter-seqs \
--i-data $data_file \
--i-table $table_file \
--o-filtered-data ${data_file%.qza}-filtered.qza
done
library(dada2)
packageVersion("dada2")
library(ShortRead)
packageVersion("ShortRead")
library(Biostrings)
packageVersion("Biostrings")path <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/ITSxpress_STANDALONE/ITSxpress_merged/" ## CHANGE ME to the directory containing the fastq files.
head(list.files(path))FWD <- "AACTTTYRRCAAYGGATCWCT" ## CHANGE ME to your forward primer sequence
REV <- "AGCCTCCGCTTATTGATATGCTTAART" ## CHANGE ME...allOrients <- function(primer) {
# Create all orientations of the input sequence
require(Biostrings)
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna),
RevComp = Biostrings::reverseComplement(dna))
return(sapply(orients, toString)) # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
FWD.orientsfnFs.filtN <- file.path(path, "filtN", basename(fnFs))
#filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)
filterAndTrim(fnFs, fnFs.filtN, maxN = 0, multithread = TRUE)primerHits <- function(primer, fn) {
# Counts number of reads in which the primer is found
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]),
#FWD.ReverseReads = sapply(FWD.orients,primerHits, fn = fnRs.filtN[[1]]))
REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]))
#REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))out <- filterAndTrim(fnFs, filtFs, maxN = 0, maxEE = 2, truncQ = 2,
minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE) # on windows, set multithread = FALSE
head(out)dadaFs <- dada(derepFs, err = errF, multithread = TRUE)
#dadaFs2 <- dada(derepFs, err = errF, multithread = TRUE, =1e-40, pool="pseudo" )seqtab_nochim <- removeBimeraDenovo(seqtab, method = "consensus", multithread = T,
verbose = T)
#seqtab_nochim2 <- removeBimeraDenovo(seqtab2, method = "consensus", multithread = T,
# verbose = T)
dim(seqtab_nochim)getN <-function(x) sum(getUniques(x))
# ?getUniques
stats <- cbind(out, sapply(dadaFs, getN),
rowSums(seqtab_nochim))
colnames(stats) <- c("input", "filtered", "denoisedF", "nonchim")
head(stats)library(tidyverse)
# Obtener los nombres de las columnas de 'df'
seqs <- colnames(seqtab)
# Función para contar letras en una cadena
contar_letras <- function(cadena) {
str_count(cadena, "[A-Za-z]")
}
# Aplicar la función de conteo a cada elemento de 'nombres_columnas'
conteo_letras <- map_int(seqs, ~ contar_letras(.x))
#print(conteo_letras)
min(conteo_letras)
max(conteo_letras)
mean(conteo_letras)#----------Asignacion taxonomica-----------
ruta_clasificador <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/sh_general_release_dynamic_18.07.2023.fasta" # de la página de UNITE
taxa <- assignTaxonomy(seqtab_nochim, ruta_clasificador, multithread = TRUE, tryRC = TRUE)
taxa2 <- assignTaxonomy(seqtab_nochim2, ruta_clasificador, multithread = TRUE, tryRC = TRUE)
# Visualizar lo que se genero despues de la asignacion
taxa_print <- taxa
rownames(taxa_print) <- NULL
head(taxa_print)
dim(taxa_print)save( seqtab_nochim2, taxa2,
file = "dada2_results2.RData")
write.csv(taxa, "taxonomy.csv")
#write.csv(seqtab_final, "table_final.csv", row.names = TRUE)
write_tsv(seqtab_final %>% rownames_to_column("#SampleID"), "table_final.txt")
write.csv(stats, "stats.csv")
#remotes::install_github("vmikk/metagMisc")
library(metagMisc)
dada_to_fasta(seqtab = seqtab, out = "seqtab.fasta")
#dada_to_fasta(seqtab = seqtab2, out = "seqtab2.fasta")library(dada2)
packageVersion("dada2")
library(ShortRead)
packageVersion("ShortRead")
library(Biostrings)
packageVersion("Biostrings")path <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/ITS_STANDALONE/" ## CHANGE ME to the directory containing the fastq files.
head(list.files(path))fnFs <- sort(list.files(path, pattern = "_R1.fastq.gz", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "_R2.fastq.gz", full.names = TRUE))FWD <- "AACTTTYRRCAAYGGATCWCT" ## CHANGE ME to your forward primer sequence
REV <- "AGCCTCCGCTTATTGATATGCTTAART" ## CHANGE ME...allOrients <- function(primer) {
# Create all orientations of the input sequence
require(Biostrings)
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna),
RevComp = Biostrings::reverseComplement(dna))
return(sapply(orients, toString)) # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
FWD.orientsfnFs.filtN <- file.path(path, "filtN", basename(fnFs)) # Put N-filtered files in filtN/ subdirectory
fnRs.filtN <- file.path(path, "filtN", basename(fnRs))
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE)primerHits <- function(primer, fn) {
# Counts number of reads in which the primer is found
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]), FWD.ReverseReads = sapply(FWD.orients,
primerHits, fn = fnRs.filtN[[1]]), REV.ForwardReads = sapply(REV.orients, primerHits,
fn = fnFs.filtN[[1]]), REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))#cutadapt <- "C:/Users/HP/AppData/Local/Programs/Python/Python312/Scripts/cutadapt.exe" # CHANGE ME to the cutadapt path on your machine
#system2(cutadapt, args = "--version") # Run shell commands from R
# Definir la ruta al lanzador de Python
python_launcher <- "py.exe"
# Ejecutar cutadapt desde R
output <- system2(python_launcher, args = c("-m", "cutadapt", "--version"), stdout = TRUE)
print(output)
path.cut <- file.path(path, "cutadapt")
if(!dir.exists(path.cut)) dir.create(path.cut)
fnFs.cut <- file.path(path.cut, basename(fnFs))
fnRs.cut <- file.path(path.cut, basename(fnRs))
FWD.RC <- dada2:::rc(FWD)
REV.RC <- dada2:::rc(REV)
# Trim FWD and the reverse-complement of REV off of R1 (forward reads)
R1.flags <- paste("-g", FWD, "-a", REV.RC)
# Trim REV and the reverse-complement of FWD off of R2 (reverse reads)
R2.flags <- paste("-G", REV, "-A", FWD.RC)# Run Cutadapt
for(i in seq_along(fnFs)) {
system2(python_launcher, args = c("-m", "cutadapt",R1.flags, R2.flags, "-n", 2, # -n 2 required to remove FWD and REV from reads
"-o", fnFs.cut[i], "-p", fnRs.cut[i], # output files
fnFs.filtN[i], fnRs.filtN[i],
"--minimum-length=1")) # input files
}rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.cut[[1]]), FWD.ReverseReads = sapply(FWD.orients,
primerHits, fn = fnRs.cut[[1]]), REV.ForwardReads = sapply(REV.orients, primerHits,
fn = fnFs.cut[[1]]), REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.cut[[1]]))
# Forward and reverse fastq filenames have the format:
cutFs <- sort(list.files(path.cut, pattern = "_R1.fastq.gz", full.names = TRUE))
cutRs <- sort(list.files(path.cut, pattern = "_R2.fastq.gz", full.names = TRUE))
# Extract sample names, assuming filenames have format:
get.sample.name <- function(fname) strsplit(basename(fname), "_")[[1]][1]
sample.names <- unname(sapply(cutFs, get.sample.name))
head(sample.names)filtFs <- file.path(path.cut, "filtered", basename(cutFs))
filtRs <- file.path(path.cut, "filtered", basename(cutRs))out <- filterAndTrim(cutFs, filtFs, cutRs, filtRs, maxN = 0, maxEE = c(2, 2), truncQ = 2,
minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE) # on windows, set multithread = FALSE
head(out)errF <- learnErrors(filtFs, multithread = TRUE)
errR <- learnErrors(filtRs, multithread = TRUE)
#plotErrors(errF, nominalQ = TRUE)dadaFs <- dada(derepFs, err = errF, multithread = TRUE )
dadaRs <- dada(derepRs, err = errF, multithread = TRUE )seqtab_nochim <- removeBimeraDenovo(seqtab, method = "consensus", multithread = T,
verbose = T)
dim(seqtab_nochim)getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN),
rowSums(seqtab_nochim))
# If processing a single sample, remove the sapply calls: e.g. replace
# sapply(dadaFs, getN) with getN(dadaFs)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
head(track)library(tidyverse)
# Obtener los nombres de las columnas de 'df'
seqs <- colnames(seqtab)
# Función para contar letras en una cadena
contar_letras <- function(cadena) {
str_count(cadena, "[A-Za-z]")
}
# Aplicar la función de conteo a cada elemento de 'nombres_columnas'
conteo_letras <- map_int(seqs, ~ contar_letras(.x))
#print(conteo_letras)
min(conteo_letras)
max(conteo_letras)
mean(conteo_letras)#----------Asignacion taxonomica-----------
ruta_clasificador <- "../ITS_corredor/HN00174206/data_REANAL_2024_LAST/sh_general_release_dynamic_18.07.2023.fasta" # de la página de UNITE
taxa <- assignTaxonomy(seqtab_nochim, ruta_clasificador, multithread = TRUE, tryRC = TRUE)
# Visualizar lo que se genero despues de la asignacion
taxa_print <- taxa
rownames(taxa_print) <- NULL
head(taxa_print)
dim(taxa_print)save( seqtab_nochim, taxa,
file = "dada2_results_paired_final.RData")
write_tsv(seqtab_final %>% rownames_to_column("#SampleID"), "table3_no_hasids.txt")
#remotes::install_github("vmikk/metagMisc")
library(metagMisc)
dada_to_fasta(seqtab = seqtab, out = "seqtab3.fasta")
seqtab_final <- as.data.frame(seqtab_nochim) %>%
rownames_to_column(var = "ids") %>%
mutate(ids = str_extract(ids, "[0-9]+[A-Z]+")) %>%
column_to_rownames(var = "ids")Figura 2. Alpha diversidad a todos los órdenes
Figura 3. Alpha diversidad por polígono al orden q=0
Figura 4. Alpha diversidad por polígono al orden q=1
Figura 5. Alpha diversidad por polígono al orden q=2